Visualização de Dados

Visualização de Dados

Referências

Datasets

Obtém informações do conjunto de dados AutoMPG Dataset

library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(datasets)
nrow(mpg)
## [1] 234
ncol(mpg)
## [1] 11

Gráfico de Linhas

library(plotly)

trace_0 = rnorm(100, mean = 5)
trace_1 = rnorm(100, mean = 0)
trace_2 = rnorm(100, mean = -5)
x = c(1:100)

data = data.frame(x, trace_0, trace_1, trace_2)

fig = plot_ly(data, x = ~x)
fig = fig %>% add_trace(y = ~trace_0, name = 'Padrão 1',mode = 'lines')
fig = fig %>% add_trace(y = ~trace_1, name = 'Padrão 2', mode = 'lines+markers')
fig = fig %>% add_trace(y = ~trace_2, name = 'Padrão 3', mode = 'markers')

fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

Gráfico de Barras

Segue um gráfico de barras para o conjunto AutoMPG Dataset:

fig = plot_ly(data = mpg, x = ~model, y = ~year, type = 'bar')

fig

Filtrando instancias

str(mpg)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...

Contando os valores do atributo “manufacturer” e pegando esses valores:

counts = table(mpg$manufacturer)

values = unique(mpg$manufacturer)

Nosso primeiro histograma:

fig = plot_ly(x = values, y = counts, type = 'bar',name=values)
fig = fig %>% layout(xaxis = list(title = 'Modelo'), yaxis = list(title = 'Ano'))
fig

Podemos testar também o gráfico de barras empilhado:

counts = table(mpg$manufacturer)

fig = plot_ly(data = mpg, x = ~model, y = ~year, type = 'bar')
#fig <- fig %>% layout(yaxis = list(title = 'Count'), barmode = 'stack')

fig

Vamos comparar agora apenas os carros das fabricantes Toyota e Volkswagen

toyota = mpg[mpg$manufacturer == 'toyota',]
volks = mpg[mpg$manufacturer == 'volkswagen',]

Como fica a Toyota:

toyota

Como fica Volkswagen:

volks

Gráfico de Pizza

fig = plot_ly(data = volks, labels = volks$year, x=volks$year, type = 'pie')

fig
## Warning: 'pie' objects don't have these attributes: 'x'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'labels', 'label0', 'dlabel', 'values', 'marker', 'text', 'hovertext', 'scalegroup', 'textinfo', 'hoverinfo', 'hovertemplate', 'texttemplate', 'textposition', 'textfont', 'insidetextorientation', 'insidetextfont', 'outsidetextfont', 'automargin', 'title', 'domain', 'hole', 'sort', 'direction', 'rotation', 'pull', '_deprecated', 'idssrc', 'customdatasrc', 'metasrc', 'labelssrc', 'valuessrc', 'textsrc', 'hovertextsrc', 'hoverinfosrc', 'hovertemplatesrc', 'texttemplatesrc', 'textpositionsrc', 'pullsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Gráfico de Pizza

fig = plot_ly(data = toyota, labels = toyota$class, x= toyota$class, type = 'pie')

fig
## Warning: 'pie' objects don't have these attributes: 'x'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'labels', 'label0', 'dlabel', 'values', 'marker', 'text', 'hovertext', 'scalegroup', 'textinfo', 'hoverinfo', 'hovertemplate', 'texttemplate', 'textposition', 'textfont', 'insidetextorientation', 'insidetextfont', 'outsidetextfont', 'automargin', 'title', 'domain', 'hole', 'sort', 'direction', 'rotation', 'pull', '_deprecated', 'idssrc', 'customdatasrc', 'metasrc', 'labelssrc', 'valuessrc', 'textsrc', 'hovertextsrc', 'hoverinfosrc', 'hovertemplatesrc', 'texttemplatesrc', 'textpositionsrc', 'pullsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Box Plot

fig = plot_ly(y=volks$displ, type = "box", name="Prices Volkswagen")
fig = fig %>% add_trace(y=toyota$displ, name="Prices Toyota")


fig
summary(volks$displ)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.800   2.000   2.000   2.256   2.500   3.600
fig = plot_ly(y=volks$displ, type = "box", boxpoints = "all", name="Prices Volkswagen")
fig = fig %>% add_trace(y=toyota$displ, name="Prices Toyota")


fig

Mapas

df = read.csv("https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv")

#df
#table(df)

Plota o mapa com as informações solicitadas

fig = plot_ly(df, type='choropleth', locations=df$CODE, z=df$GDP..BILLIONS., text=df$COUNTRY, colorscale="Blues")

fig

Gráfico de Dispersão

fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length)

fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
library(plotly)

pal = c("red", "blue", "green")
pal = setNames(pal, c("virginica", "setosa", "versicolor"))

fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length, color = ~Species, colors = pal)

fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
pal = c("red", "blue", "green")
pal = setNames(pal, c("virginica", "setosa", "versicolor"))

fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length, color = ~Species, colors = pal,
              marker = list(size = 8,
                            line = list(color="black",width = 1)))

fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
pal = c("red", "blue", "green")
pal = setNames(pal, c("virginica", "setosa", "versicolor"))

fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length, color = I('black'),
              mode = 'markers', symbol = ~Species, symbols = c('circle','x','o'))

fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

Matrizes de gráficos de dispersão

axis = list(showline=FALSE,
            zeroline=FALSE,
            gridcolor='#ffff',
            ticklen=4,
            titlefont=list(size=13))

fig <- iris %>%
  plot_ly()
fig <- fig %>%
  add_trace(
    type = 'splom',
    dimensions = list(
      list(label='sepal length', values=~Sepal.Length),
      list(label='sepal width', values=~Sepal.Width),
      list(label='petal length', values=~Petal.Length),
      list(label='petal width', values=~Petal.Width)
    ),
    color = ~Species, colors = c('#636EFA','#EF553B','#00CC96') ,
    marker = list(size = 8,line = list(color="black",width = 1)
    )
  )
fig <-  fig %>% style(diagonal = list(visible = FALSE))
fig <- fig %>%
  layout(
    hovermode='closest',
    dragmode= 'select',
    plot_bgcolor='rgba(240,240,240, 0.95)',
    xaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
    yaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
    xaxis2=axis,
    xaxis3=axis,
    xaxis4=axis,
    yaxis2=axis,
    yaxis3=axis,
    yaxis4=axis
  )

fig

Mapa de Calor (Heatmap)

fig <- plot_ly(z = volcano, type = "heatmap")

fig
dados = iris[,1:4]

#dados.scaled = scale(dados, center = T, scale = T)

corrIris = cor(dados)

corrIris
##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    1.0000000  -0.1175698    0.8717538   0.8179411
## Sepal.Width    -0.1175698   1.0000000   -0.4284401  -0.3661259
## Petal.Length    0.8717538  -0.4284401    1.0000000   0.9628654
## Petal.Width     0.8179411  -0.3661259    0.9628654   1.0000000
dimensions = list('sepal length','sepal width','petal length','petal width')

fig = plot_ly(x=dimensions,y=dimensions,z=corrIris, type = "heatmap",colors = "Greys")

fig

Visualização baseada no posicionamento de pontos

Podemos empregar uma técnica de redução de dimensionalidade para transformar os dados originais definidos em um espaço de alta dimensão para um espaço de dimensão baixa. Para propósitos de visualização, a baixa dimensão é igual a \(2\).

Vamos apresentar duas técnicas de redução de dimensionalidade: Análise de Componentes Principais (Principal Component Analysis - PCA) e t-Distributed Stochastic Neighborhood Embedding (t-SNE).

Análise de Componentes Principais (Principal Component Analysis - PCA)

Um bom tutorial sobre PCA pode ser encontrado neste link.

Variância acumulada das componentes principais:

library(plotly)
library(stats)

X = subset(iris, select = -c(Species))

prin_comp = prcomp(X, rank. = 2)

summary(prin_comp)
## Importance of first k=2 (out of 4) components:
##                           PC1     PC2
## Standard deviation     2.0563 0.49262
## Proportion of Variance 0.9246 0.05307
## Cumulative Proportion  0.9246 0.97769

A proporção de variância concentrada pelas primeiras k compenentes principais é dada por:

explained_variance_ratio <- summary(prin_comp)[["importance"]]['Proportion of Variance',]
cumsum <- cumsum(explained_variance_ratio)
data <- data.frame(cumsum,seq(1, length(cumsum), 1))
colnames(data) <- c('Explained_Variance','Components')

fig = plot_ly(data = data, x = ~Components, y = ~Explained_Variance, type = 'scatter', mode = 'lines', fill = 'tozeroy') %>%
  layout(
    xaxis = list(
      title = "# Componentes", tickvals = seq(1, length(cumsum), 1)),
    yaxis = list(
      title = "Variância Concentrada"))
fig

Visualizando o conjunto de dados Iris

components = prin_comp[["x"]]
components = data.frame(components)
components = cbind(components, iris$Species)


fig = plot_ly(components, x = ~PC1, y = ~PC2, color = ~iris$Species, colors = c('#636EFA','#EF553B','#00CC96'),
               type = 'scatter',
               text = ~iris$Species,
               textposition = "auto",
               hoverinfo = 'text',
               #hovertemplate = paste('<i>PC1</i>: %{y:.2f}',
              #          '<br><b>PC2</b>: %{x}<br>',
              #          '<b>%{text}</b>'),
               marker = list(size = 8,line = list(color="black",width = 1)))


fig
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
#install.packages("tsne")

Agora vamos realizar a apresentação dos dados

library(tsne)

features = subset(iris, select = -c(Species)) 

set.seed(0)
tsne = tsne(features, initial_dims = 2,perplexity=32)
## Warning in if (class(X) == "dist") {: the condition has length > 1 and only the
## first element will be used
## sigma summary: Min. : 0.389524058138923 |1st Qu. : 0.465656663923391 |Median : 0.525013618999155 |Mean : 0.534049275364469 |3rd Qu. : 0.59709359081531 |Max. : 0.76775512516638 |
## Epoch: Iteration #100 error is: 10.7045314935631
## Epoch: Iteration #200 error is: 0.0543421795362035
## Epoch: Iteration #300 error is: 0.0521840519824475
## Epoch: Iteration #400 error is: 0.051380705408758
## Epoch: Iteration #500 error is: 0.0511819976879906
## Epoch: Iteration #600 error is: 0.0511223834873948
## Epoch: Iteration #700 error is: 0.0511048166892463
## Epoch: Iteration #800 error is: 0.0510989180454898
## Epoch: Iteration #900 error is: 0.0510969852365817
## Epoch: Iteration #1000 error is: 0.0510962153083171
tsne = data.frame(tsne)
pdb = cbind(tsne,iris$Species)   # juntando vetores por meio das colunas
fig =  plot_ly(data = pdb ,x =  ~X1, y = ~X2, type = 'scatter', marker = list(size = 8,line = list(color="black",width = 1)), split = ~iris$Species)

fig
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode